iris.csv fileiris_sample <- read_csv("../data/iris.csv")
## Rows: 150 Columns: 5
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): Species
## dbl (4): Sepal.Length, Sepal.Width, Petal.Length, Petal.Width
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
spec(iris_sample)
## cols(
## Sepal.Length = col_double(),
## Sepal.Width = col_double(),
## Petal.Length = col_double(),
## Petal.Width = col_double(),
## Species = col_character()
## )
iris_data <- read_csv("../data/iris.csv", col_types = cols(
Sepal.Length = col_double(),
Sepal.Width = col_double(),
Petal.Length = col_double(),
Petal.Width = col_double(),
Species = col_factor()
))
head(iris_data)
plotlyplot_ly(
data = iris_data,
x = ~Sepal.Length,
color = ~Species,
type = "histogram",
opacity = 0.6,
xbins = list(size = 0.3)
) |> layout(
title = "Histogram of Sepal Length for Each Species",
xaxis = list(title = "Sepal Length"),
yaxis = list(title = "Count"),
barmode = "overlay"
)
gg_histogram <- ggplot(iris_data, aes(x = Sepal.Length, fill = Species)) +
geom_histogram(alpha = 0.6, position = "identity", binwidth = 0.3) +
theme_minimal() +
labs(
title = "Histogram of Sepal Length for Each Species",
x = "Sepal Length",
y = "Count"
)
ggplotly(gg_histogram)
iris_long <- iris_data |>
pivot_longer(cols = c(Sepal.Length, Sepal.Width, Petal.Length, Petal.Width),
names_to = "Metric",
values_to = "Value")
metrics <- unique(iris_long$Metric)
plots <- lapply(seq_along(metrics), function(i) {
plot_ly(
data = filter(iris_long, Metric == metrics[i]),
x = ~Value,
color = ~Species,
type = "histogram",
opacity = 0.6,
showlegend = (i == 1)
)
})
subplot(plots[[1]], plots[[2]], plots[[3]], plots[[4]],
nrows = 2, shareX = TRUE, shareY = TRUE) |>
layout(
title = "Histograms of Iris Metrics by Species",
showlegend = TRUE
)
Petal.Length clearly provides the best species separation.
plots <- lapply(seq_along(metrics), function(i) {
plot_ly(
data = filter(iris_long, Metric == metrics[i]),
x = ~Species,
y = ~Value,
color = ~Species,
type = "box",
showlegend = (i == 1)
)
})
subplot(plots[[1]], plots[[2]], plots[[3]], plots[[4]],
nrows = 2, shareX = TRUE, shareY = TRUE) |>
layout(
title = "Box Plots of Iris Metrics by Species",
showlegend = TRUE
)
plot_ly(
data = iris_data,
x = ~Petal.Length,
y = ~Petal.Width,
color = ~Species,
type = "scatter",
mode = "markers"
)
plot_ly(
data = iris_data,
x = ~Petal.Length,
y = ~Petal.Width,
z = ~Sepal.Length,
color = ~Species,
type = "scatter3d",
mode = "markers"
)
Setosa is clearly separable, while Versicolor and Virginica overlap.
us-states.csvusstates_sample <- read_csv("../data/us-states.csv")
## Rows: 50686 Columns: 5
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): state, fips
## dbl (2): cases, deaths
## date (1): date
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
spec(usstates_sample)
## cols(
## date = col_date(format = ""),
## state = col_character(),
## fips = col_character(),
## cases = col_double(),
## deaths = col_double()
## )
usstates <- read_csv("../data/us-states.csv", col_types = cols(
date = col_date(format = "%Y-%m-%d"),
state = col_factor(),
fips = col_double(),
cases = col_double(),
deaths = col_double()
))
head(usstates)
monthly_cases <- usstates |>
mutate(year_month = floor_date(date, "month")) |>
group_by(state, year_month) |>
summarize(
new_cases = last(cases) - first(cases),
.groups = "drop"
)
head(monthly_cases)
plot_ly(data = monthly_cases,
x = ~year_month,
y = ~new_cases,
color = ~state,
type = 'scatter',
mode = 'lines')
## Warning in RColorBrewer::brewer.pal(N, "Set2"): n too large, allowed maximum for palette Set2 is 8
## Returning the palette you asked for with that many colors
## Warning in RColorBrewer::brewer.pal(N, "Set2"): n too large, allowed maximum for palette Set2 is 8
## Returning the palette you asked for with that many colors
ny_monthly_cases <- filter(monthly_cases, state == "New York")
plot_ly(data = ny_monthly_cases,
x = ~year_month,
y = ~new_cases,
type = 'scatter',
mode = 'lines+markers')
ny_monthly_cases |> filter(new_cases == max(new_cases))
peak_month <- ny_monthly_cases |> filter(new_cases == max(new_cases)) |> pull(year_month)
peak_month_cases <- monthly_cases |>
filter(year_month == peak_month) |>
mutate(state_abb = state.abb[match(state, state.name)],
state_abb = ifelse(state == "District of Columbia", "DC", state_abb))
plot_ly(
data = peak_month_cases,
locations = ~state_abb,
locationmode = "USA-states",
z = ~new_cases,
type = "choropleth",
colorscale = "Viridis"
)
animated_data <- monthly_cases |>
mutate(
year_month = as.character(year_month),
state_abb = state.abb[match(state, state.name)],
state_abb = ifelse(state == "District of Columbia", "DC", state_abb)
)
plot_ly(
data = animated_data,
locations = ~state_abb,
locationmode = "USA-states",
z = ~new_cases,
frame = ~year_month,
type = "choropleth",
colorscale = "Viridis"
)